Function to load data from non-HLA. Converts all columns to the appropriate types.
load_data <- function() {
df<- read.csv("data/non_HLA.csv")
df$Sex <- df$Sex %>% as.factor()
df$Condition <- df$Condition %>%
as.factor()
num_colnames<-c("AT1R.U.mL..CO.17.", "ENO1.Normalized..CO.4218.",
"FLRT2.Normalized..CO.688.", "VM.Normalized..CO.820.",
"TUBA1B.Normalized..CO.1987.", "IFIH1.Normalized..CO.3870.",
"PTPRN.Normalized..CO.3042.", "AURKA.Normalized..CO.4892.",
"PPIA.Normalized..CO.3292.", "EIF2A.Normalized..CO.6901.",
"PECR.Normalized..CO.4120.", "PRKCH.Normalized..CO.1048.",
"CXCL11.Normalized..CO.309.", "CXCL10.Normalized..CO.285.",
"AGRIN.Normalized..CO.504.", "GAPDH.Normalized..CO.508.",
"MYOSIN.Normalized..CO.9341.", "AGT.Normalized..CO.1641.",
"CHAF1B.Normalized..CO.11722.", "PLA2R.Normalized..CO.195.",
"LG3.Normalized..CO.3801.", "GSTT1.Normalized..CO.6136.",
"LMNA.Normalized..CO.6633.", "PRKCZ.Normalized..CO.9104.",
"LMNB1.Normalized..CO.2065.", "ARHGDIB.Normalized..CO.3918.",
"HNRNPK.Normalized..CO.845.", "IFNG.Normalized..CO.498.",
"REG3A.Normalized..CO.86.", "NUCLEOLIN.Normalized..CO.3669.",
"CD36.Normalized..CO.1591.", "TNFA.Normalized..CO.5331.",
"CXCL9.Normalized..CO.575.", "GDNF.Normalized..CO.1004.",
"COLLAGEN.I.Normalized..CO.375.", "COLLAGEN.II.Normalized..CO.155.",
"COLLAGEN.III.Normalized..CO.128.", "COLLAGEN.IV.Normalized..CO.71.",
"COLLAGEN.V.Normalized..CO.187.", "FIBRONECTIN.Normalized..CO.141.",
"Age.at.Transplant", "DSA_num")
df <- df %>% mutate(across(all_of(num_colnames), ~as.numeric(as.character(.))))
df$AGRIN.Normalized..CO.504. <- df$AGRIN.Normalized..CO.504./10
df$AGT.Normalized..CO.1641. <- df$AGT.Normalized..CO.1641./100
df$ARHGDIB.Normalized..CO.3918. <- df$ARHGDIB.Normalized..CO.3918./100
df$AURKA.Normalized..CO.4892. <- df$AURKA.Normalized..CO.4892./100
df$CD36.Normalized..CO.1591. <- df$CD36.Normalized..CO.1591./100
df$CHAF1B.Normalized..CO.11722. <- df$CHAF1B.Normalized..CO.11722./100
df$COLLAGEN.I.Normalized..CO.375. <- df$COLLAGEN.I.Normalized..CO.375./10
df$COLLAGEN.II.Normalized..CO.155. <- df$COLLAGEN.II.Normalized..CO.155./10
df$COLLAGEN.III.Normalized..CO.128. <- df$COLLAGEN.III.Normalized..CO.128./10
df$COLLAGEN.IV.Normalized..CO.71. <- df$COLLAGEN.IV.Normalized..CO.71./10
df$COLLAGEN.V.Normalized..CO.187. <- df$COLLAGEN.V.Normalized..CO.187./10
df$CXCL10.Normalized..CO.285. <- df$CXCL10.Normalized..CO.285./10
df$CXCL11.Normalized..CO.309. <- df$CXCL11.Normalized..CO.309./10
df$CXCL9.Normalized..CO.575. <- df$CXCL9.Normalized..CO.575./100
df$EIF2A.Normalized..CO.6901. <- df$EIF2A.Normalized..CO.6901./100
df$ENO1.Normalized..CO.4218. <- df$ENO1.Normalized..CO.4218./100
df$FIBRONECTIN.Normalized..CO.141. <- df$FIBRONECTIN.Normalized..CO.141./10
df$FLRT2.Normalized..CO.688. <- df$FLRT2.Normalized..CO.688./100
df$GAPDH.Normalized..CO.508. <- df$GAPDH.Normalized..CO.508./100
df$GDNF.Normalized..CO.1004. <- df$GDNF.Normalized..CO.1004./100
df$GSTT1.Normalized..CO.6136. <- df$GSTT1.Normalized..CO.6136./100
df$HNRNPK.Normalized..CO.845. <- df$HNRNPK.Normalized..CO.845./100
df$IFNG.Normalized..CO.498. <- df$IFNG.Normalized..CO.498./100
df$IFIH1.Normalized..CO.3870. <- df$IFIH1.Normalized..CO.3870./100
df$LG3.Normalized..CO.3801. <- df$LG3.Normalized..CO.3801./100
df$LMNA.Normalized..CO.6633. <- df$LMNA.Normalized..CO.6633./100
df$LMNB1.Normalized..CO.2065. <- df$LMNB1.Normalized..CO.2065./100
df$MYOSIN.Normalized..CO.9341. <- df$MYOSIN.Normalized..CO.9341./100
df$NUCLEOLIN.Normalized..CO.3669. <- df$NUCLEOLIN.Normalized..CO.3669./10
df$PECR.Normalized..CO.4120. <- df$PECR.Normalized..CO.4120./100
df$PLA2R.Normalized..CO.195. <- df$PLA2R.Normalized..CO.195./100
df$PPIA.Normalized..CO.3292. <- df$PPIA.Normalized..CO.3292./100
df$PRKCZ.Normalized..CO.9104. <- df$PRKCZ.Normalized..CO.9104./100
df$PRKCH.Normalized..CO.1048. <- df$PRKCH.Normalized..CO.1048./100
df$PTPRN.Normalized..CO.3042. <- df$PTPRN.Normalized..CO.3042./100
df$REG3A.Normalized..CO.86. <- df$REG3A.Normalized..CO.86./10
df$TNFA.Normalized..CO.5331. <- df$TNFA.Normalized..CO.5331./100
df$TUBA1B.Normalized..CO.1987. <- df$TUBA1B.Normalized..CO.1987./100
df$VM.Normalized..CO.820. <- df$VM.Normalized..CO.820./100
return(df)
}
Function to replace normalized values with “1” for above cutoff and “0” for below. Cutoff numbers are in the column names.
process_columns_with_cutoff <- function(colnames_vector, df) {
# Loop through each column name provided in the vector
for (colname in colnames_vector) {
cutoff_str <- sub(".*CO\\.", "", colname)
cutoff_str <- gsub("\\..*", "", cutoff_str)
cutoff <- as.numeric(cutoff_str)
df[[colname]] <- ifelse(df[[colname]] > cutoff, 1, 0)
#df[[colname]] <- as.factor(df[[colname]])
}
return(df)
}
Function to plot distributions of continuous variables.
plot_variable_distributions <- function(data, variable_names, outcome_var) {
# Pivot the data frame to long format
data_long <- pivot_longer(data, cols = all_of(variable_names), names_to = "variable", values_to = "value")
# Filter out rows with NA values in 'value' to avoid issues in plotting
data_long <- na.omit(data_long)
# Plot data with aes() adjusted to include color based on the outcome variable
p <- ggplot(data_long, aes(x = value, fill = as.factor(get(outcome_var)), color = as.factor(get(outcome_var)))) +
geom_density(alpha = 0.3) + # Adjust alpha for fill transparency
facet_wrap(~variable, scales = "free", nrow = 20) +
scale_fill_brewer(palette = "Set1", name = outcome_var) +
scale_color_brewer(palette = "Set1", name = outcome_var) + # Add this line to use consistent color palette for lines
theme_minimal() +
theme(legend.position = "bottom") +
labs(x = "Value", y = "Density")
# Alter text for readability
p <- p + theme(strip.text = element_text(size = 8), # Text size
strip.text.x = element_text(angle = 0, hjust = 0.5), # Adjust angle and horizontal justification
strip.background = element_rect(fill = "white")) # Change background color for visibility
print(p)
ggsave("variable_distributions.svg")
}
Function that returns subsetted data frame based on the column names provided
create_subset_df <- function(original_df, colnames_array) {
# Ensure that the column names exist in the original dataframe
valid_colnames <- colnames_array[colnames_array %in% names(original_df)]
subset_df <- original_df[, valid_colnames, drop = FALSE]
return(subset_df)
}
Return all highly correlated variables
find_highly_correlated_variables <- function(dataframe, column_names, threshold = 0.8) {
subset_df <- dataframe[, column_names]
corr_matrix <- cor(subset_df, use = "complete.obs") # 'complete.obs' handles missing values by using complete cases
high_corr_pairs <- which(abs(corr_matrix) > threshold & lower.tri(corr_matrix), arr.ind = TRUE)
# Extract the variable names for these pairs
high_corr_vars <- unique(c(column_names[high_corr_pairs[, "row"]], column_names[high_corr_pairs[, "col"]]))
return(high_corr_vars)
}
Write formulas for the multinomial logistic regression
create_pom_formula <- function(response_var, clin_cofactors, independent_vars, df) {
all_vars <- c(response_var, clin_cofactors, independent_vars)
missing_vars <- setdiff(all_vars, names(df))
if(length(missing_vars) > 0) { stop(paste("The following variables are missing in the dataframe:",
paste(missing_vars, collapse=", "), "."))
}
predictors <- c(clin_cofactors, independent_vars)
formula_str <- paste(response_var, "~", paste(predictors, collapse = " + "))
return(as.formula(formula_str))
}
Function to create a PCA column
addPCAComponent <- function(dataframe, columns, pcaColumnName) {
subset_df <- dataframe[, columns, drop = FALSE]
pca_result <- prcomp(subset_df, center = TRUE, scale. = TRUE)
# Extract the first principal component
first_pca_component <- pca_result$x[, 1]
dataframe[[pcaColumnName]] <- first_pca_component
return(dataframe)
}
Saving the names of the independent variables and the continuous independent variables
#missing fibronectin and pla2r because they have low levels of fluorescence => inaccurate OR.
tot_nHLA_var<-c("AT1R.U.mL..CO.17.",
"ENO1.Normalized..CO.4218.", "FLRT2.Normalized..CO.688.",
"VM.Normalized..CO.820.", "TUBA1B.Normalized..CO.1987.",
"IFIH1.Normalized..CO.3870.", "PTPRN.Normalized..CO.3042.",
"AURKA.Normalized..CO.4892.", "PPIA.Normalized..CO.3292.",
"EIF2A.Normalized..CO.6901.", "PECR.Normalized..CO.4120.",
"PRKCH.Normalized..CO.1048.", "CXCL11.Normalized..CO.309.",
"CXCL10.Normalized..CO.285.", "AGRIN.Normalized..CO.504.",
"GAPDH.Normalized..CO.508.", "MYOSIN.Normalized..CO.9341.",
"AGT.Normalized..CO.1641.", "CHAF1B.Normalized..CO.11722.",
"LG3.Normalized..CO.3801.", "GSTT1.Normalized..CO.6136.", "LMNA.Normalized..CO.6633.",
"PRKCZ.Normalized..CO.9104.", "LMNB1.Normalized..CO.2065.",
"ARHGDIB.Normalized..CO.3918.", "HNRNPK.Normalized..CO.845.",
"IFNG.Normalized..CO.498.", "REG3A.Normalized..CO.86.",
"NUCLEOLIN.Normalized..CO.3669.", "CD36.Normalized..CO.1591.",
"TNFA.Normalized..CO.5331.", "CXCL9.Normalized..CO.575.",
"GDNF.Normalized..CO.1004.", "COLLAGEN.I.Normalized..CO.375.",
"COLLAGEN.II.Normalized..CO.155.", "COLLAGEN.III.Normalized..CO.128.",
"COLLAGEN.IV.Normalized..CO.71.", "COLLAGEN.V.Normalized..CO.187.")
tot_nHLA_var2<-c("AT1R.U.mL..CO.17.",
"ENO1.Normalized..CO.4218.", "FLRT2.Normalized..CO.688.",
"VM.Normalized..CO.820.", "TUBA1B.Normalized..CO.1987.",
"IFIH1.Normalized..CO.3870.", "PTPRN.Normalized..CO.3042.",
"AURKA.Normalized..CO.4892.", "PPIA.Normalized..CO.3292.",
"EIF2A.Normalized..CO.6901.", "PECR.Normalized..CO.4120.",
"PRKCH.Normalized..CO.1048.", "CXCL11.Normalized..CO.309.",
"CXCL10.Normalized..CO.285.", "AGRIN.Normalized..CO.504.",
"GAPDH.Normalized..CO.508.", "MYOSIN.Normalized..CO.9341.",
"AGT.Normalized..CO.1641.", "CHAF1B.Normalized..CO.11722.",
"PLA2R.Normalized..CO.195.", "LG3.Normalized..CO.3801.",
"GSTT1.Normalized..CO.6136.", "LMNA.Normalized..CO.6633.",
"PRKCZ.Normalized..CO.9104.", "LMNB1.Normalized..CO.2065.",
"ARHGDIB.Normalized..CO.3918.", "HNRNPK.Normalized..CO.845.",
"IFNG.Normalized..CO.498.", "REG3A.Normalized..CO.86.",
"NUCLEOLIN.Normalized..CO.3669.", "CD36.Normalized..CO.1591.",
"TNFA.Normalized..CO.5331.", "CXCL9.Normalized..CO.575.",
"GDNF.Normalized..CO.1004.", "COLLAGEN.I.Normalized..CO.375.",
"COLLAGEN.II.Normalized..CO.155.", "COLLAGEN.III.Normalized..CO.128.",
"COLLAGEN.IV.Normalized..CO.71.", "COLLAGEN.V.Normalized..CO.187.",
"FIBRONECTIN.Normalized..CO.141.")
tot_clin_var<-c("Age.at.Transplant", "DSA_num")
tot_independant_var_cont<-c(tot_nHLA_var, tot_clin_var)
nc_var<-c("AT1R.U.mL..CO.17.", "ENO1.Normalized..CO.4218.",
"FLRT2.Normalized..CO.688.", "IFIH1.Normalized..CO.3870.",
"AURKA.Normalized..CO.4892.", "PPIA.Normalized..CO.3292.",
"EIF2A.Normalized..CO.6901.", "PECR.Normalized..CO.4120.",
"PRKCH.Normalized..CO.1048.", "CXCL11.Normalized..CO.309.",
"CXCL10.Normalized..CO.285.", "AGRIN.Normalized..CO.504.",
"GAPDH.Normalized..CO.508.", "MYOSIN.Normalized..CO.9341.",
"CHAF1B.Normalized..CO.11722.", "PLA2R.Normalized..CO.195.",
"LG3.Normalized..CO.3801.", "GSTT1.Normalized..CO.6136.",
"LMNA.Normalized..CO.6633.", "PRKCZ.Normalized..CO.9104.",
"LMNB1.Normalized..CO.2065.", "ARHGDIB.Normalized..CO.3918.",
"HNRNPK.Normalized..CO.845.", "IFNG.Normalized..CO.498.",
"REG3A.Normalized..CO.86.", "NUCLEOLIN.Normalized..CO.3669.",
"TNFA.Normalized..CO.5331.", "CXCL9.Normalized..CO.575.",
"GDNF.Normalized..CO.1004.", "COLLAGEN.I.Normalized..CO.375.",
"COLLAGEN.II.Normalized..CO.155.", "COLLAGEN.III.Normalized..CO.128.",
"COLLAGEN.IV.Normalized..CO.71.", "COLLAGEN.V.Normalized..CO.187.",
"FIBRONECTIN.Normalized..CO.141.")
Replace normalized fluorescence values with “1”, presence or “0” absence.
df_nocut<- load_data()
#not doing because screws analysis
#df<-process_columns_with_cutoff(tot_nHLA_var, df_nocut)
df<-df_nocut
Plot the distributions of continuous variables
plot_variable_distributions(df_nocut, tot_independant_var_cont, "Condition")
Subset into pre and post transplant data
df_pre<- subset(df, Pre==1)
df_post<- subset(df, Pre==0)
Correlation plot for pre-transplant data
df_pre_nc <- df_nocut %>% filter(Pre==1)
create_subset_df(df_pre_nc, tot_independant_var_cont) %>% cor() %>% corrplot()
find_highly_correlated_variables(df_pre_nc, tot_independant_var_cont, 0.8)
## [1] "VM.Normalized..CO.820." "TUBA1B.Normalized..CO.1987."
## [3] "AGT.Normalized..CO.1641." "CD36.Normalized..CO.1591."
## [5] "PPIA.Normalized..CO.3292." "COLLAGEN.III.Normalized..CO.128."
## [7] "COLLAGEN.IV.Normalized..CO.71." "FLRT2.Normalized..CO.688."
## [9] "COLLAGEN.I.Normalized..CO.375."
Correlation plot for post-transplant data
df_post_nc <- df_nocut %>% filter(Pre==0)
create_subset_df(df_post_nc, tot_independant_var_cont) %>% cor() %>% corrplot()
find_highly_correlated_variables(df_post_nc, tot_independant_var_cont, 0.8)
## [1] "TUBA1B.Normalized..CO.1987." "CD36.Normalized..CO.1591."
## [3] "PPIA.Normalized..CO.3292." "AGT.Normalized..CO.1641."
## [5] "COLLAGEN.III.Normalized..CO.128." "FLRT2.Normalized..CO.688."
## [7] "VM.Normalized..CO.820." "COLLAGEN.I.Normalized..CO.375."
variables that are colinear are replaced with a PCA
all_colin<-c("VM.Normalized..CO.820.", "TUBA1B.Normalized..CO.1987.", "AGT.Normalized..CO.1641.", "CD36.Normalized..CO.1591.", "PPIA.Normalized..CO.3292.", "COLLAGEN.III.Normalized..CO.128.", "COLLAGEN.IV.Normalized..CO.71.", "FLRT2.Normalized..CO.688.", "COLLAGEN.I.Normalized..CO.375.")
colin_set1<-c("VM.Normalized..CO.820.", "TUBA1B.Normalized..CO.1987.", "AGT.Normalized..CO.1641.", "CD36.Normalized..CO.1591.", "PPIA.Normalized..CO.3292.", "FLRT2.Normalized..CO.688.")
colin_set2<-all_colin[!all_colin %in% colin_set1]
df_pre<-addPCAComponent(df_pre, colin_set1, "PCA1")
df_post<-addPCAComponent(df_post, colin_set1, "PCA1")
df_pre<-addPCAComponent(df_pre, colin_set2, "PCA2")
df_post<-addPCAComponent(df_post, colin_set2, "PCA2")
df_pre<-df_pre %>% dplyr::select(-all_of(all_colin))
df_post<-df_post %>% dplyr::select(-all_of(all_colin))
tot_nHLA_var<-tot_nHLA_var[!tot_nHLA_var %in% all_colin]
tot_nHLA_var<-c(tot_nHLA_var, "PCA1", "PCA2")
tot_independant_var_cont<-tot_independant_var_cont[!tot_independant_var_cont %in% all_colin]
tot_independant_var_cont<-c(tot_independant_var_cont, "PCA1", "PCA2")
Function to remove Columns in which there is only one instance (only presence or only absence) as this does not help with prediction.
find_factor_columns_with_missing_level <- function(dataframe, columns) {
# Initialize an empty vector to store the names of columns that meet the criteria
columns_with_missing_level <- c()
for (col_name in columns) {
if (is.factor(dataframe[[col_name]]) && length(levels(dataframe[[col_name]])) == 2) {
level_counts <- table(dataframe[[col_name]])
# Check if any of the levels has no instances
if (any(level_counts == 0)) {
columns_with_missing_level <- c(columns_with_missing_level, col_name)
}
}
}
return(columns_with_missing_level)
}
Create datasets for DGF and failure analysis
df_pre_dgf<- subset(df_pre, !Condition==2)
df_pre_dgf$Condition <- factor(df_pre_dgf$Condition)
df_post_dgf<-subset(df_post, !Condition==2)
df_post_dgf$Condition <- factor(df_post_dgf$Condition)
df_pre_fail<-subset(df_pre, !Condition==1)
df_pre_fail$Condition <- factor(df_pre_fail$Condition)
df_post_fail<-subset(df_post, !Condition==1)
df_post_fail$Condition <- factor(df_post_fail$Condition)
Multinomial logistic regression for pre-transplant data
pre_dgf_formula<-create_pom_formula("Condition", tot_clin_var, tot_nHLA_var, df_pre)
pre_dgf_model<-glm(pre_dgf_formula, family = "binomial",data = df_pre_dgf)
#pre_dgf_step.model<-stepAIC(pre_dgf_model, direction = "backward", trace = FALSE)
#optireg_pre_dgf<-pre_dgf_step.model$terms
#pre_dgf_model_opt<-glm(optireg_pre_dgf, family = "binomial",data = df_pre_dgf)
summary(pre_dgf_model)
##
## Call:
## glm(formula = pre_dgf_formula, family = "binomial", data = df_pre_dgf)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.2830970 1.9045903 -0.674 0.5005
## Age.at.Transplant 0.0428024 0.0336888 1.271 0.2039
## DSA_num 1.0914282 2.1455947 0.509 0.6110
## AT1R.U.mL..CO.17. -0.0148840 0.0738407 -0.202 0.8403
## ENO1.Normalized..CO.4218. -0.0003579 0.0389132 -0.009 0.9927
## IFIH1.Normalized..CO.3870. -0.0046864 0.0182464 -0.257 0.7973
## PTPRN.Normalized..CO.3042. -0.0243427 0.0162702 -1.496 0.1346
## AURKA.Normalized..CO.4892. 0.0626144 0.0313737 1.996 0.0460 *
## EIF2A.Normalized..CO.6901. 0.0061056 0.0183817 0.332 0.7398
## PECR.Normalized..CO.4120. 0.0833783 0.0541459 1.540 0.1236
## PRKCH.Normalized..CO.1048. -0.0433572 0.0383565 -1.130 0.2583
## CXCL11.Normalized..CO.309. -0.0161343 0.0195859 -0.824 0.4101
## CXCL10.Normalized..CO.285. -0.0109939 0.0333204 -0.330 0.7414
## AGRIN.Normalized..CO.504. -0.0015165 0.0069283 -0.219 0.8267
## GAPDH.Normalized..CO.508. 0.0011635 0.0253096 0.046 0.9633
## MYOSIN.Normalized..CO.9341. -0.0336616 0.0206109 -1.633 0.1024
## CHAF1B.Normalized..CO.11722. -0.0305358 0.0203113 -1.503 0.1327
## LG3.Normalized..CO.3801. 0.1058020 0.0473588 2.234 0.0255 *
## GSTT1.Normalized..CO.6136. -0.0367313 0.0209455 -1.754 0.0795 .
## LMNA.Normalized..CO.6633. 0.0520271 0.0376736 1.381 0.1673
## PRKCZ.Normalized..CO.9104. -0.0082422 0.0161995 -0.509 0.6109
## LMNB1.Normalized..CO.2065. -0.0081659 0.0385510 -0.212 0.8322
## ARHGDIB.Normalized..CO.3918. 0.0297790 0.0237262 1.255 0.2094
## HNRNPK.Normalized..CO.845. -0.0385209 0.0638070 -0.604 0.5460
## IFNG.Normalized..CO.498. 0.0571146 0.0513555 1.112 0.2661
## REG3A.Normalized..CO.86. 0.0052798 0.0232114 0.227 0.8201
## NUCLEOLIN.Normalized..CO.3669. -0.0376766 0.0172336 -2.186 0.0288 *
## TNFA.Normalized..CO.5331. 0.1892495 0.0950230 1.992 0.0464 *
## CXCL9.Normalized..CO.575. 0.3223528 0.1694657 1.902 0.0571 .
## GDNF.Normalized..CO.1004. 0.0552137 0.0987410 0.559 0.5760
## COLLAGEN.II.Normalized..CO.155. -0.0395747 0.1083155 -0.365 0.7148
## COLLAGEN.V.Normalized..CO.187. -0.2499026 0.1373879 -1.819 0.0689 .
## PCA1 -0.6399012 0.4976879 -1.286 0.1985
## PCA2 0.3619538 0.7372108 0.491 0.6234
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 116.227 on 85 degrees of freedom
## Residual deviance: 71.231 on 52 degrees of freedom
## AIC: 139.23
##
## Number of Fisher Scoring iterations: 7
pre_fail_formula<-create_pom_formula("Condition", tot_clin_var, tot_nHLA_var, df_pre)
pre_fail_model<-glm(pre_fail_formula, family = "binomial",data = df_pre_fail)
#pre_fail_step.model<-stepAIC(pre_fail_model, direction = "backward", trace = FALSE)
#optireg_pre_fail<-pre_fail_step.model$terms
#pre_fail_model_opt<-glm(optireg_pre_fail, family = "binomial",data = df_pre_fail)
summary(pre_fail_model)
##
## Call:
## glm(formula = pre_fail_formula, family = "binomial", data = df_pre_fail)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.916674 1.228937 2.373 0.01763 *
## Age.at.Transplant -0.039896 0.017205 -2.319 0.02040 *
## DSA_num 0.791697 0.580884 1.363 0.17291
## AT1R.U.mL..CO.17. 0.070367 0.034358 2.048 0.04055 *
## ENO1.Normalized..CO.4218. 0.008691 0.021780 0.399 0.68985
## IFIH1.Normalized..CO.3870. -0.013321 0.014500 -0.919 0.35825
## PTPRN.Normalized..CO.3042. -0.017953 0.010989 -1.634 0.10230
## AURKA.Normalized..CO.4892. 0.023580 0.023897 0.987 0.32379
## EIF2A.Normalized..CO.6901. 0.004931 0.009506 0.519 0.60394
## PECR.Normalized..CO.4120. 0.031161 0.028303 1.101 0.27091
## PRKCH.Normalized..CO.1048. -0.017382 0.015724 -1.105 0.26894
## CXCL11.Normalized..CO.309. -0.009347 0.004487 -2.083 0.03722 *
## CXCL10.Normalized..CO.285. 0.017628 0.014946 1.179 0.23823
## AGRIN.Normalized..CO.504. 0.002586 0.003396 0.762 0.44634
## GAPDH.Normalized..CO.508. -0.050911 0.026907 -1.892 0.05847 .
## MYOSIN.Normalized..CO.9341. -0.015287 0.010465 -1.461 0.14407
## CHAF1B.Normalized..CO.11722. 0.004292 0.012760 0.336 0.73660
## LG3.Normalized..CO.3801. 0.067364 0.027074 2.488 0.01284 *
## GSTT1.Normalized..CO.6136. -0.008830 0.010595 -0.833 0.40462
## LMNA.Normalized..CO.6633. -0.003124 0.019711 -0.158 0.87409
## PRKCZ.Normalized..CO.9104. -0.004527 0.010133 -0.447 0.65503
## LMNB1.Normalized..CO.2065. -0.041961 0.037736 -1.112 0.26615
## ARHGDIB.Normalized..CO.3918. -0.021202 0.016216 -1.307 0.19105
## HNRNPK.Normalized..CO.845. -0.022611 0.033465 -0.676 0.49925
## IFNG.Normalized..CO.498. 0.026272 0.021535 1.220 0.22249
## REG3A.Normalized..CO.86. 0.011244 0.008161 1.378 0.16829
## NUCLEOLIN.Normalized..CO.3669. -0.021701 0.008362 -2.595 0.00945 **
## TNFA.Normalized..CO.5331. 0.086481 0.056294 1.536 0.12448
## CXCL9.Normalized..CO.575. 0.045367 0.084316 0.538 0.59054
## GDNF.Normalized..CO.1004. 0.011658 0.050795 0.230 0.81848
## COLLAGEN.II.Normalized..CO.155. 0.081847 0.051395 1.592 0.11128
## COLLAGEN.V.Normalized..CO.187. -0.097263 0.059283 -1.641 0.10087
## PCA1 -0.165889 0.336398 -0.493 0.62192
## PCA2 -0.352706 0.359910 -0.980 0.32709
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 194.77 on 152 degrees of freedom
## Residual deviance: 126.84 on 119 degrees of freedom
## AIC: 194.84
##
## Number of Fisher Scoring iterations: 8
Multinomial logistic regression for post-transplant data
post_dgf_formula<-create_pom_formula("Condition", tot_clin_var, tot_nHLA_var, df_post)
post_dgf_model<-glm(post_dgf_formula, family = "binomial",data = df_post_dgf)
#post_dgf_step.model<-stepAIC(post_dgf_model, direction = "backward", trace = FALSE)
#optireg_post_dgf<-post_dgf_step.model$terms
#post_dgf_model_opt<-glm(optireg_post_dgf, family = "binomial",data = df_post_dgf)
summary(post_dgf_model)
##
## Call:
## glm(formula = post_dgf_formula, family = "binomial", data = df_post_dgf)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.9365147 2.0445183 -0.947 0.3436
## Age.at.Transplant 0.0449142 0.0317984 1.412 0.1578
## DSA_num 2.1436691 2.0486757 1.046 0.2954
## AT1R.U.mL..CO.17. -0.0467240 0.0545789 -0.856 0.3920
## ENO1.Normalized..CO.4218. 0.0431505 0.0533377 0.809 0.4185
## IFIH1.Normalized..CO.3870. -0.0004939 0.0250485 -0.020 0.9843
## PTPRN.Normalized..CO.3042. -0.0263981 0.0238215 -1.108 0.2678
## AURKA.Normalized..CO.4892. 0.0774523 0.0322191 2.404 0.0162 *
## EIF2A.Normalized..CO.6901. 0.0089806 0.0194839 0.461 0.6449
## PECR.Normalized..CO.4120. 0.1263991 0.0684166 1.847 0.0647 .
## PRKCH.Normalized..CO.1048. -0.0591217 0.0659747 -0.896 0.3702
## CXCL11.Normalized..CO.309. -0.0280399 0.0406095 -0.690 0.4899
## CXCL10.Normalized..CO.285. -0.0299910 0.0507300 -0.591 0.5544
## AGRIN.Normalized..CO.504. -0.0321584 0.0185673 -1.732 0.0833 .
## GAPDH.Normalized..CO.508. -0.0621826 0.0427140 -1.456 0.1455
## MYOSIN.Normalized..CO.9341. -0.0510456 0.0246392 -2.072 0.0383 *
## CHAF1B.Normalized..CO.11722. -0.0360798 0.0261617 -1.379 0.1679
## LG3.Normalized..CO.3801. 0.1121599 0.0744646 1.506 0.1320
## GSTT1.Normalized..CO.6136. -0.0414309 0.0243092 -1.704 0.0883 .
## LMNA.Normalized..CO.6633. -0.0345304 0.0586055 -0.589 0.5557
## PRKCZ.Normalized..CO.9104. -0.0232599 0.0175539 -1.325 0.1852
## LMNB1.Normalized..CO.2065. 0.0775619 0.0663910 1.168 0.2427
## ARHGDIB.Normalized..CO.3918. 0.0177367 0.0272987 0.650 0.5159
## HNRNPK.Normalized..CO.845. -0.0590927 0.0823071 -0.718 0.4728
## IFNG.Normalized..CO.498. 0.0597575 0.0480484 1.244 0.2136
## REG3A.Normalized..CO.86. 0.0442035 0.0445267 0.993 0.3208
## NUCLEOLIN.Normalized..CO.3669. -0.0135224 0.0121236 -1.115 0.2647
## TNFA.Normalized..CO.5331. 0.1745214 0.1783264 0.979 0.3277
## CXCL9.Normalized..CO.575. 0.1897164 0.2068158 0.917 0.3590
## GDNF.Normalized..CO.1004. 0.1629479 0.0905470 1.800 0.0719 .
## COLLAGEN.II.Normalized..CO.155. 0.0390213 0.1743071 0.224 0.8229
## COLLAGEN.V.Normalized..CO.187. 0.0401761 0.0817565 0.491 0.6231
## PCA1 -0.8350594 0.5711057 -1.462 0.1437
## PCA2 -0.1234494 0.3200662 -0.386 0.6997
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 116.227 on 85 degrees of freedom
## Residual deviance: 69.788 on 52 degrees of freedom
## AIC: 137.79
##
## Number of Fisher Scoring iterations: 8
post_fail_formula<-create_pom_formula("Condition", tot_clin_var, tot_nHLA_var, df_post)
post_fail_model<-glm(post_fail_formula, family = "binomial",data = df_post_fail)
#post_fail_step.model<-stepAIC(post_fail_model, direction = "backward", trace = FALSE)
#optireg_post_fail<-post_fail_step.model$terms
#post_fail_model_opt<-glm(optireg_post_fail, family = "binomial",data = df_post_fail)
summary(post_fail_model)
##
## Call:
## glm(formula = post_fail_formula, family = "binomial", data = df_post_fail)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.830994 1.125440 1.627 0.10376
## Age.at.Transplant -0.042412 0.017361 -2.443 0.01457 *
## DSA_num 2.169908 0.670093 3.238 0.00120 **
## AT1R.U.mL..CO.17. 0.025368 0.031583 0.803 0.42184
## ENO1.Normalized..CO.4218. -0.005129 0.021416 -0.240 0.81071
## IFIH1.Normalized..CO.3870. -0.045777 0.029042 -1.576 0.11497
## PTPRN.Normalized..CO.3042. -0.009943 0.012812 -0.776 0.43774
## AURKA.Normalized..CO.4892. -0.008551 0.022032 -0.388 0.69793
## EIF2A.Normalized..CO.6901. 0.017606 0.012100 1.455 0.14565
## PECR.Normalized..CO.4120. -0.005563 0.041362 -0.134 0.89302
## PRKCH.Normalized..CO.1048. 0.029017 0.022836 1.271 0.20386
## CXCL11.Normalized..CO.309. -0.010330 0.010111 -1.022 0.30692
## CXCL10.Normalized..CO.285. -0.016401 0.014396 -1.139 0.25457
## AGRIN.Normalized..CO.504. -0.003696 0.005282 -0.700 0.48410
## GAPDH.Normalized..CO.508. 0.005447 0.035031 0.155 0.87643
## MYOSIN.Normalized..CO.9341. -0.039516 0.015117 -2.614 0.00895 **
## CHAF1B.Normalized..CO.11722. 0.017552 0.014781 1.187 0.23504
## LG3.Normalized..CO.3801. 0.161777 0.059839 2.704 0.00686 **
## GSTT1.Normalized..CO.6136. 0.012148 0.012134 1.001 0.31677
## LMNA.Normalized..CO.6633. 0.010705 0.026859 0.399 0.69022
## PRKCZ.Normalized..CO.9104. 0.008211 0.012223 0.672 0.50174
## LMNB1.Normalized..CO.2065. -0.020385 0.043703 -0.466 0.64089
## ARHGDIB.Normalized..CO.3918. -0.039147 0.025609 -1.529 0.12636
## HNRNPK.Normalized..CO.845. -0.097622 0.048342 -2.019 0.04344 *
## IFNG.Normalized..CO.498. 0.045920 0.028172 1.630 0.10310
## REG3A.Normalized..CO.86. 0.005473 0.015182 0.360 0.71849
## NUCLEOLIN.Normalized..CO.3669. -0.013220 0.008047 -1.643 0.10044
## TNFA.Normalized..CO.5331. 0.044417 0.065844 0.675 0.49994
## CXCL9.Normalized..CO.575. 0.143552 0.115775 1.240 0.21500
## GDNF.Normalized..CO.1004. 0.099958 0.112486 0.889 0.37420
## COLLAGEN.II.Normalized..CO.155. 0.127697 0.087526 1.459 0.14457
## COLLAGEN.V.Normalized..CO.187. 0.010814 0.081212 0.133 0.89407
## PCA1 0.038354 0.266146 0.144 0.88541
## PCA2 -0.273048 0.207891 -1.313 0.18904
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 194.77 on 152 degrees of freedom
## Residual deviance: 109.88 on 119 degrees of freedom
## AIC: 177.88
##
## Number of Fisher Scoring iterations: 8
Plotting the Odds Ratios
library(forestmodel)
to_rm<-c("COLLAGEN.V.Normalized..CO.187.", "PCA1", "PCA2", "FIBRONECTIN.Normalized..CO.141.", "PLA2R.Normalized..CO.195.", "DSA_num", "Age.at.Transplant")
to_include<-tot_independant_var_cont[!tot_independant_var_cont %in% to_rm]
f_pre<-forest_model(model_list=list("Delayed Graft function"=pre_dgf_model, "Organ Rejection"=pre_fail_model), covariates = to_include, format_options = forest_model_format_options(point_size = 2))
f_pre
ggsave("pre_trans_plot.svg")
f_post<-forest_model(model_list=list("Delayed Graft function"=post_dgf_model, "Organ Rejection"=post_fail_model), covariates = to_include, format_options = forest_model_format_options(point_size =2))
f_post
ggsave("post_trans_plot.svg")
```{}